import scrapy


class ItcastSpider(scrapy.Spider):
    name = "itcast"  # 命令指定的文件名，爬虫文件的名称,爬虫文件的唯一标识,所以文件名
    # allowed_domains = ["xxx.com"]  # 允许的域名
    start_urls = [
        "http://127.0.0.1:5000/jar",
        "http://127.0.0.1:5000/bobo",
        "http://127.0.0.1:5000/test"
    ]  # 起始的url,该列表中存放的url会被scrapy自动进行请求的发送

    def parse(self, response):
        # xpath返回的是列表，列表里面是selector类型的对象
        title = response.xpath("//div[@class='container']/h2/text()").get()
        url = response.xpath("//div[@class='container']/span/a/@href").getall()
        print(title)
        print(url)


